# -*- coding: utf-8 -*-

# Scrapy settings for ArticleSpider project
#
# For simplicity, this file contains only settings considered important or
# commonly used. You can find more settings consulting the documentation:
#
#     https://doc.scrapy.org/en/latest/topics/settings.html
#     https://doc.scrapy.org/en/latest/topics/downloader-middleware.html
#     https://doc.scrapy.org/en/latest/topics/spider-middleware.html
import os
import sys

BOT_NAME = 'ArticleSpider'

SPIDER_MODULES = ['ArticleSpider.spiders']
NEWSPIDER_MODULE = 'ArticleSpider.spiders'


# Crawl responsibly by identifying yourself (and your website) on the user-agent
#USER_AGENT = 'ArticleSpider (+http://www.yourdomain.com)'

# Obey robots.txt rules
ROBOTSTXT_OBEY = False #True

# Configure maximum concurrent requests performed by Scrapy (default: 16)
#CONCURRENT_REQUESTS = 32

# Configure a delay for requests for the same website (default: 0)
# See https://doc.scrapy.org/en/latest/topics/settings.html#download-delay
# See also autothrottle settings and docs
DOWNLOAD_DELAY = 3
# The download delay setting will honor only one of:
#CONCURRENT_REQUESTS_PER_DOMAIN = 16
#CONCURRENT_REQUESTS_PER_IP = 16

# Disable cookies (enabled by default)
COOKIES_ENABLED = False
COOKIES = {'_ga': 'GA1.2.750199924.1533563172', '_gid': 'GA1.2.876361711.1533563172', 'user_trace_token': '20180806214624-1bdbc0d0-997f-11e8-b72d-525400f775ce', 'LGUID': '20180806214624-1bdbc564-997f-11e8-b72d-525400f775ce', 'index_location_city': '%E5%85%A8%E5%9B%BD', 'WEBTJ-ID': '20180807172252-16513b23ca85-001a28ca5ba692-444a002e-1049088-16513b23ca976', 'JSESSIONID': 'ABAAABAAAFCAAEG9C1883AF097DA6EE23D3E4F9AD22793D', 'Hm_lvt_4233e74dff0ae5bd0a3d81c6ccf756e6': '1533563171,1533633773,1533636706', 'X_HTTP_TOKEN': '4235765fc7540813f96fb983e97ae2fa', 'ab_test_random_num': '0', '_putrc': '169F433E50531B20123F89F2B170EADC', 'login': 'true', 'hasDeliver': '0', 'gate_login_token': 'b434e7537e5b7f3010b67dc54bbe85134d93f0339417dd0ce9021ebe02eb0833', 'unick': '%E6%8B%89%E5%8B%BE%E7%94%A8%E6%88%B77116', 'mds_login_authToken': '"K4i20OPux7Kiv3leAko5/GHZ6P0/xIoVHnyLnfhjaGibI+jr1W5wJfkbzhI1DUcjrrNOZVF3uarSuprrNpLVFSBFBiF0JkHWLtGJvyP0ZYIQaDwLmqBlE8f2Z9OuxYmL3Ov4mINttxjoHeWc115OjR+A299vKg5KYI2KwVp3zTB4rucJXOpldXhUiavxhcCELWDotJ+bmNVwmAvQCptcy5e7czUcjiQC32Lco44BMYXrQ+AIOfEccJKHpj0vJ+ngq/27aqj1hWq8tEPFFjdnxMSfKgAnjbIEAX3F9CIW8BSiMHYmPBt7FDDY0CCVFICHr2dp5gQVGvhfbqg7VzvNsw', 'sajssdk_2015_cross_new_user': '1', 'sensorsdata2015jssdkcross': '%7B%22distinct_id%22%3A%2216513fc258d2d-07eeecd0ac36c9-444a002e-1049088-16513fc258e27f%22%2C%22%24device_id%22%3A%2216513fc258d2d-07eeecd0ac36c9-444a002e-1049088-16513fc258e27f%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E7%9B%B4%E6%8E%A5%E6%B5%81%E9%87%8F%22%2C%22%24latest_referrer%22%3A%22%22%2C%22%24latest_referrer_host%22%3A%22%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC_%E7%9B%B4%E6%8E%A5%E6%89%93%E5%BC%80%22%7D%7D', 'TG-TRACK-CODE': 'index_navigation', 'LGSID': '20180807191701-683e1516-9a33-11e8-b78e-525400f775ce', 'PRE_UTM': '', 'PRE_HOST': '', 'PRE_SITE': 'https%3A%2F%2Fwww.lagou.com%2Fjobs%2Flist_python%2520%3FlabelWords%3D%26fromSearch%3Dtrue%26suginput%3D', 'PRE_LAND': 'https%3A%2F%2Fwww.lagou.com%2Fjobs%2Flist_python%25E6%2595%25B0%25E6%258D%25AE%25E5%2588%2586%25E6%259E%2590%3Foquery%3Dpython%26fromSearch%3Dtrue%26labelWords%3Drelative', '_gat': '1', 'SEARCH_ID': '74bc268ea1054d3b86525570de2cc482', 'LGRID': '20180807192813-f8569b0b-9a34-11e8-b78e-525400f775ce', 'Hm_lpvt_4233e74dff0ae5bd0a3d81c6ccf756e6': '1533641280'}
#Disable Telnet Console (enabled by default)
#TELNETCONSOLE_ENABLED = False

# Override the default request headers:
DEFAULT_REQUEST_HEADERS = {
    'Connection': 'keep-alive',
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
    'Accept-Encoding': 'gzip, deflate, br',
    'Accept-Language': 'zh-CN,zh;q=0.9',
    'user-agent': 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Safari/537.36'
}

# Enable or disable spider middlewares
# See https://doc.scrapy.org/en/latest/topics/spider-middleware.html
#SPIDER_MIDDLEWARES = {
#    'ArticleSpider.middlewares.ArticlespiderSpiderMiddleware': 543,
#}

# Enable or disable downloader middlewares
# See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html
DOWNLOADER_MIDDLEWARES = {
   # 'ArticleSpider.middlewares.ArticlespiderDownloaderMiddleware': 543,
   # 'scrapy.downloadermiddlewares.useragent.UserAgentMiddleware': None,
   # 'ArticleSpider.middlewares.rotate_user_agent.RandomUserAgentMiddleware': 400,
   # 'ArticleSpider.middlewares.RandomProxyMiddleware': 120,
   # 'ArticleSpider.middlewares.RandomUserAgentMiddleware': 300,
   #  'ArticleSpider.middlewares.JspageMiddleware': 300,

}

# Enable or disable extensions
# See https://doc.scrapy.org/en/latest/topics/extensions.html
#EXTENSIONS = {
#    'scrapy.extensions.telnet.TelnetConsole': None,
#}

# Configure item pipelines
# See https://doc.scrapy.org/en/latest/topics/item-pipeline.html
ITEM_PIPELINES = {
   # 'scrapy.pipelines.images.ImagesPipeline': 1,
   # 'ArticleSpider.pipelines.ArticleImagesPipeline':2,
   #  'ArticleSpider.pipelines.JsonWithEncodingPipeline':3,
   #'ArticleSpider.pipelines.JsonExporterPipeline': 4,
    # 'ArticleSpider.pipelines.MysqlPipeline': 5,
    # 'ArticleSpider.pipelines.MysqlTwistePipeline': 6,
    'ArticleSpider.pipelines.ElasticSearchPipeline': 6,
}
IMAGES_URLS_FIELD = "img_url"

project_dir = os.path.dirname(__file__)
IMAGES_STORE = os.path.join(project_dir, 'images')

BASE_DIR = os.path.dirname(os.path.dirname(__file__))
sys.path.insert(0, os.path.join(BASE_DIR, 'ArticleSpider'))
# IMAGES_MIN_HEIGHT = 100
# IMAGES_MIN_WIDTH = 100
# Enable and configure the AutoThrottle extension (disabled by default)
# See https://doc.scrapy.org/en/latest/topics/autothrottle.html
#AUTOTHROTTLE_ENABLED = True
# The initial download delay
#AUTOTHROTTLE_START_DELAY = 5
# The maximum download delay to be set in case of high latencies
#AUTOTHROTTLE_MAX_DELAY = 60
# The average number of requests Scrapy should be sending in parallel to
# each remote server
#AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
# Enable showing throttling stats for every response received:
#AUTOTHROTTLE_DEBUG = False



# Enable and configure HTTP caching (disabled by default)
# See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
#HTTPCACHE_ENABLED = True
#HTTPCACHE_EXPIRATION_SECS = 0
#HTTPCACHE_DIR = 'httpcache'
#HTTPCACHE_IGNORE_HTTP_CODES = []
#HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
MYSQL_HOST = 'localhost'
MYSQL_USER = 'root'
MYSQL_PASSWORD = '0000'
MYSQL_DB = 'crawed'

SQL_DATETIME_FORMAT = "%Y-%m-%d %H:%M:%S"
SQL_DATE_FORMAT = "%Y-%m-%d"
RANDOM_UA_TYPE = "random"
ES_HOST = "127.0.0.1"